
// A1_import_raw_data
//==============================================================================

// Description: This code imports the raw data on the number of firms per country
// and sector into Stata and merges as well as formats it

clear

cd "D:\data_replication"

// Import Company Data by Nace 1
//==============================================================================

import delimited data\number_firms\raw_data\sbs_na_2a_dade_1_Data.csv
save data\number_firms\raw_data\sbs_na_2a_dade_1_Data.dta, replace

import delimited data\number_firms\raw_data\sbs_na_2a_dfdn_1_Data.csv, clear 
save data\number_firms\raw_data\sbs_na_2a_dfdn_1_Data.dta, replace

import delimited data\number_firms\raw_data\sbs_na_2a_mi_1_Data.csv, clear 
save data\number_firms\raw_data\sbs_na_2a_mi_1_Data.dta, replace

use data\number_firms\raw_data\sbs_na_2a_dade_1_Data.dta, clear
append using data\number_firms\raw_data\sbs_na_2a_dfdn_1_Data.dta
append using data\number_firms\raw_data\sbs_na_2a_mi_1_Data.dta

keep if time == 2007

sort geo nace_r1
by geo nace_r1: keep if _n == 1                                                 // Code "D" (manufacturing) appears twice for each cty -> Deleted


// Keep only 4 digit industries and, if not available, 3-digit industries
//==============================================================================

gen ll = length(nace_r1)

keep if ll == 6 | ///
nace_r1 == "CA101" | ///
nace_r1 == "CA102" | ///
nace_r1 == "CA103" | ///
nace_r1 == "CA111" | ///
nace_r1 == "CA112" | ///
nace_r1 == "CA120" | ///
nace_r1 == "CB131" | ///
nace_r1 == "CB132" | ///
nace_r1 == "CB143" | ///
nace_r1 == "CB144" | ///
nace_r1 == "CB145" | ///
nace_r1 == "DA152" | ///
nace_r1 == "DA160" | ///
nace_r1 == "DB171" | ///
nace_r1 == "DB172" | ///
nace_r1 == "DB173" | ///
nace_r1 == "DB174" | ///
nace_r1 == "DB176" | ///
nace_r1 == "DB181" | ///
nace_r1 == "DB183" | ///
nace_r1 == "DC191" | ///
nace_r1 == "DC192" | ///
nace_r1 == "DC193" | ///
nace_r1 == "DD201" | ///
nace_r1 == "DD202" | ///
nace_r1 == "DD203" | ///
nace_r1 == "DD204" | ///
nace_r1 == "DF231" | ///
nace_r1 == "DF232" | ///
nace_r1 == "DF233" | ///
nace_r1 == "DG242" | ///
nace_r1 == "DG243" | ///
nace_r1 == "DG247" | ///
nace_r1 == "DI263" | ///
nace_r1 == "DI264" | ///
nace_r1 == "DI267" | ///
nace_r1 == "DJ271" | ///
nace_r1 == "DJ283" | ///
nace_r1 == "DJ284" | ///
nace_r1 == "DK296" | ///
nace_r1 == "DL311" | ///
nace_r1 == "DL312" | ///
nace_r1 == "DL313" | ///
nace_r1 == "DL314" | ///
nace_r1 == "DL315" | ///
nace_r1 == "DL321" | ///
nace_r1 == "DL322" | ///
nace_r1 == "DL323" | ///
nace_r1 == "DL331" | ///
nace_r1 == "DL332" | ///
nace_r1 == "DL333" | ///
nace_r1 == "DL334" | ///
nace_r1 == "DL335" | ///
nace_r1 == "DM341" | ///
nace_r1 == "DM342" | ///
nace_r1 == "DM343" | ///
nace_r1 == "DM352" | ///
nace_r1 == "DM353" | ///
nace_r1 == "DM355" | ///
nace_r1 == "DN363" | ///
nace_r1 == "DN364" | ///
nace_r1 == "DN365" | ///
nace_r1 == "DN371" | ///
nace_r1 == "DN372" 


// Drop Non-EU Countries
//==============================================================================

drop if geo == "Albania"
drop if geo == "Croatia"
drop if geo == "European Union (27 countries)"
drop if geo == "Norway"
drop if geo == "Switzerland"
drop if geo == "Former Yugoslav Republic of Macedonia, the"


// Assign declarant codes
//==============================================================================

gen declarant = 38 if geo == "Austria"
replace declarant = 17 if geo == "Belgium"
replace declarant = 68 if geo == "Bulgaria"
replace declarant = 600 if geo == "Cyprus"
replace declarant = 61 if geo == "Czech Republic"
replace declarant = 8 if geo == "Denmark"
replace declarant = 53 if geo == "Estonia"
replace declarant = 32 if geo == "Finland"
replace declarant = 1 if geo == "France"
replace declarant = 4 if geo == "Germany (until 1990 former territory of the FRG)"
replace declarant = 9 if geo == "Greece"
replace declarant = 64 if geo == "Hungary"
replace declarant = 7 if geo == "Ireland"
replace declarant = 5 if geo == "Italy" 
replace declarant = 54 if geo == "Latvia"
replace declarant = 55 if geo == "Lithuania"
replace declarant = 18 if geo == "Luxembourg"
replace declarant = 46 if geo == "Malta" 
replace declarant = 3 if geo == "Netherlands"
replace declarant = 60 if geo == "Poland"
replace declarant = 10 if geo == "Portugal"
replace declarant = 66 if geo == "Romania" 
replace declarant = 63 if geo == "Slovakia"
replace declarant = 91 if geo == "Slovenia"
replace declarant = 11 if geo == "Spain"
replace declarant = 30 if geo == "Sweden"
replace declarant = 6 if geo == "United Kingdom"


// Format and save
//==============================================================================

replace value = "." if value == ":"
replace value = subinstr(value, ",", "",.)
destring value, replace

rename time year

replace nace_r1 = substr(nace_r1, 3, 10)
destring nace_r1, replace

rename value companies
keep year declarant nace_r1 companies
order year declarant nace_r1 companies

save data\number_firms\companies_nace.dta, replace

